Workflow: RichCluster Example

Sarah Hong

2025-06-24

Load or Generate Clustering Data

load_cluster_data <- function(from_scratch=FALSE) {
  if (!from_scratch) {
    cluster_result <- readRDS(system.file("extdata", "cluster_result.rds", package = "RichCluster"))
  } else {
    rr1 <- read.delim(system.file("extdata", "go1.txt", package = "RichCluster"))
    rr2 <- read.delim(system.file("extdata", "go2.txt", package = "RichCluster"))
    
    enrichment_results <- list(rr1, rr2)
    rr_names <- c('7mo_DEG', '7mo_DMR')
    
    cluster_result <- RichCluster::cluster(
      enrichment_results, df_names = rr_names, min_terms = 5,
      distance_metric = "kappa", distance_cutoff = 0.5,
      linkage_method = "average", linkage_cutoff = 0.5
    )
  }
  return(cluster_result)
}

cluster_result <- load_cluster_data(from_scratch = FALSE)

Cluster-Level Visualizations

c_hmap <- cluster_hmap(cluster_result)
#> Warning in fix_not_all_unique(rownames(x)): Not all the values are unique - manually added prefix numbers
c_hmap
c_bar <- cluster_bar(cluster_result)
c_bar
#> Warning: Ignoring 83 observations
#> Warning: Ignoring 26 observations
c_dot <- cluster_dot(cluster_result)
c_dot
#> Warning: Ignoring 83 observations
#> Warning: Ignoring 26 observations
#> Warning: `line.width` does not currently support multiple values.
#> Warning: `line.width` does not currently support multiple values.

Term-Level Visualizations

clusters <- c("mating plug formation", "regulation of protein refolding", "regulation of plasma cell differentiation")
terms <- c("neuroblast proliferation", "regulation of tissue remodeling", "protein secretion")

t_hmap <- term_hmap(cluster_result, clusters, terms, value_type = "Padj")
#> [1] "-log10(Padj)"
t_hmap
t_bar <- term_bar(cluster_result, 48)
t_bar
#> Warning: Ignoring 21 observations
#> Warning: Can't display both discrete & non-discrete data on same axis
t_dot <- term_dot(cluster_result, 48)
t_dot
#> Warning: Ignoring 21 observations
#> Warning: `line.width` does not currently support multiple values.
#> Warning: Can't display both discrete & non-discrete data on same axis

Export Results

cluster_df <- export_df(cluster_result)
head(cluster_df)
#>   Cluster                          ClusterName
#> 1       1 skeletal muscle cell differentiation
#> 2       1 skeletal muscle cell differentiation
#> 3       1 skeletal muscle cell differentiation
#> 4       1 skeletal muscle cell differentiation
#> 5       1 skeletal muscle cell differentiation
#> 6       1 skeletal muscle cell differentiation
#>                                                                              Term Annot_7mo_DEG Annotated_7mo_DEG
#> 1                                               regulation of cellular senescence    GO:2000772                27
#> 2                                               skeletal muscle organ development    GO:0060538               195
#> 3                                           cellular response to oxidative stress    GO:0034599               233
#> 4                                              type B pancreatic cell development    GO:0003323                21
#> 5 negative regulation of intracellular steroid hormone receptor signaling pathway    GO:0033144                32
#> 6                                                        regulation of cell aging    GO:0090342                34
#>   Significant_7mo_DEG Pvalue_7mo_DEG Padj_7mo_DEG GeneID_7mo_DEG Annot_7mo_DMR Annotated_7mo_DMR Significant_7mo_DMR
#> 1                   1     0.02727905   0.11680502          Arntl          <NA>                NA                  NA
#> 2                   2     0.01683191   0.09358109   Arntl,Ankrd2          <NA>                NA                  NA
#> 3                   2     0.02349213   0.11116047   Arntl,Ankrd2          <NA>                NA                  NA
#> 4                   1     0.02127938   0.10351325          Arntl          <NA>                NA                  NA
#> 5                   1     0.03225184   0.12253346          Arntl          <NA>                NA                  NA
#> 6                   1     0.03423413   0.12592837          Arntl          <NA>                NA                  NA
#>   Pvalue_7mo_DMR Padj_7mo_DMR GeneID_7mo_DMR       GeneID     Pvalue       Padj
#> 1             NA           NA           <NA>        Arntl 0.02727905 0.11680502
#> 2             NA           NA           <NA> Arntl,Ankrd2 0.01683191 0.09358109
#> 3             NA           NA           <NA> Arntl,Ankrd2 0.02349213 0.11116047
#> 4             NA           NA           <NA>        Arntl 0.02127938 0.10351325
#> 5             NA           NA           <NA>        Arntl 0.03225184 0.12253346
#> 6             NA           NA           <NA>        Arntl 0.03423413 0.12592837